#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')
library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
Load preprocessed dataset (preprocessing code in 20_02_25_data_preprocessing.Rmd) and clustering (pipeline in 20_02_25_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybridMergedSmall'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)
Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.
datTraits = datMeta %>% dplyr::select(Diagnosis, Sex, Age, PMI, RNAExtractionBatch) %>%
dplyr::rename('ExtractionBatch' = RNAExtractionBatch)
# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)
# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated'))
}
## [1] "2 correlation(s) could not be calculated"
rm(ME_object)
Note: The correlation between Modules #00C1A2, #C19B00 and Diagonsis are the ones that cannot be calculated, weirdly enough, the thing that causes the error is that the initial correlation is too high, so it would be a very bad thing to lose this module because of this numerical error. I’m going to fill in its value using the polyserial function, which doesn’t give exactly the same results as the hetcor() function, but it’s quite similar.
# Calculate the correlation tha failed with hetcor()
moduleTraitCor['ME#00C1A2','Diagnosis'] = polyserial(MEs[,'ME#00C1A2'], datTraits$Diagnosis)
## Warning in polyserial(MEs[, "ME#00C1A2"], datTraits$Diagnosis): initial
## correlation inadmissible, -1.09270710407704, set to -0.9999
moduleTraitCor['ME#C19B00','Diagnosis'] = polyserial(MEs[,'ME#C19B00'], datTraits$Diagnosis)
## Warning in polyserial(MEs[, "ME#C19B00"], datTraits$Diagnosis): initial
## correlation inadmissible, 1.04877229459179, set to 0.9999
I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them
# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = gsub('ME','',rownames(moduleTraitCor)),
yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
main = paste('Module-Trait relationships'))
diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
'MTcor' = moduleTraitCor[,'Diagnosis'],
'MTpval' = moduleTraitPvalue[,'Diagnosis'])
genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')
rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)
The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules
top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])
cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #C19B00, #00C1A2
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
gene_id = paste0(ID, ' (', external_gene_id, ')'))
table(plot_data$ImportantModules)
##
## #00C1A2 #C19B00 Others
## 1315 2141 13035
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)
SFARI Genes seem to have high levels of Gene Significance and Module Membership in the Modules
create_plot = function(module){
plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
colnames(plot_data)[2] = 'Module'
SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
ggtitle(paste0('Module ', module,' (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
return(p)
}
create_plot(top_modules[1])
create_plot(top_modules[2])
rm(create_plot)
List of SFARI Genes in top modules ordered by SFARI score and Gene Significance
table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id,
'SFARI score'=gene.score, 'Gene Significance'=GS)
kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` != 'None') %>% dplyr::select(-Module),
caption=paste0('SFARI Genes for Module ', top_modules[1]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000141431 | ASXL3 | 0.8785457 | 1 |
| ENSG00000119335 | SET | 0.9218670 | 2 |
| ENSG00000155511 | GRIA1 | 0.8835654 | 2 |
| ENSG00000169862 | CTNND2 | 0.8586459 | 2 |
| ENSG00000196557 | CACNA1H | 0.8552054 | 2 |
| ENSG00000169432 | SCN9A | 0.8247991 | 2 |
| ENSG00000114166 | KAT2B | 0.7290687 | 2 |
| ENSG00000147050 | KDM6A | 0.7203022 | 2 |
| ENSG00000177565 | TBL1XR1 | 0.6913837 | 2 |
| ENSG00000021574 | SPAST | 0.6804570 | 2 |
| ENSG00000103507 | BCKDK | 0.5763334 | 2 |
| ENSG00000179915 | NRXN1 | 0.5254140 | 2 |
| ENSG00000079482 | OPHN1 | 0.9850829 | 3 |
| ENSG00000116117 | PARD3B | 0.9747438 | 3 |
| ENSG00000109911 | ELP4 | 0.9225894 | 3 |
| ENSG00000136425 | CIB2 | 0.8722078 | 3 |
| ENSG00000164050 | PLXNB1 | 0.8657312 | 3 |
| ENSG00000181722 | ZBTB20 | 0.8349265 | 3 |
| ENSG00000196839 | ADA | 0.8235366 | 3 |
| ENSG00000076716 | GPC4 | 0.8058872 | 3 |
| ENSG00000131165 | CHMP1A | 0.8023088 | 3 |
| ENSG00000162946 | DISC1 | 0.7804632 | 3 |
| ENSG00000196092 | PAX5 | 0.6988833 | 3 |
| ENSG00000118432 | CNR1 | 0.6828557 | 3 |
| ENSG00000205581 | HMGN1 | 0.6671115 | 3 |
| ENSG00000149571 | KIRREL3 | 0.6477266 | 3 |
| ENSG00000089006 | SNX5 | 0.5934084 | 3 |
| ENSG00000139726 | DENR | 0.5868767 | 3 |
| ENSG00000177807 | KCNJ10 | 0.5603723 | 3 |
| ENSG00000100033 | PRODH | 0.5527687 | 3 |
| ENSG00000165995 | CACNB2 | 0.5473487 | 3 |
| ENSG00000164418 | GRIK2 | 0.5458645 | 3 |
| ENSG00000158321 | AUTS2 | 0.5084040 | 3 |
| ENSG00000128849 | CGNL1 | 0.4705224 | 3 |
| ENSG00000137672 | TRPC6 | 0.4518504 | 3 |
| ENSG00000114062 | UBE3A | 0.4389561 | 3 |
| ENSG00000140945 | CDH13 | 0.4193203 | 3 |
| ENSG00000107077 | KDM4C | 0.3672602 | 3 |
| ENSG00000152910 | CNTNAP4 | 0.2005084 | 3 |
| ENSG00000182985 | CADM1 | 0.9339505 | 4 |
| ENSG00000147862 | NFIB | 0.9310289 | 4 |
| ENSG00000152208 | GRID2 | 0.9112596 | 4 |
| ENSG00000149403 | GRIK4 | 0.9071851 | 4 |
| ENSG00000156298 | TSPAN7 | 0.9050752 | 4 |
| ENSG00000147044 | CASK | 0.9039260 | 4 |
| ENSG00000103528 | SYT17 | 0.9028547 | 4 |
| ENSG00000107104 | KANK1 | 0.8876488 | 4 |
| ENSG00000129911 | KLF16 | 0.8872244 | 4 |
| ENSG00000005379 | BZRAP1 | 0.8579167 | 4 |
| ENSG00000080298 | RFX3 | 0.8542825 | 4 |
| ENSG00000169439 | SDC2 | 0.8467704 | 4 |
| ENSG00000115159 | GPD2 | 0.8381263 | 4 |
| ENSG00000131374 | TBC1D5 | 0.8372299 | 4 |
| ENSG00000172554 | SNTG2 | 0.8160934 | 4 |
| ENSG00000183098 | GPC6 | 0.8106561 | 4 |
| ENSG00000197977 | ELOVL2 | 0.7839541 | 4 |
| ENSG00000119125 | GDA | 0.7503866 | 4 |
| ENSG00000162599 | NFIA | 0.7478687 | 4 |
| ENSG00000113100 | CDH9 | 0.7457152 | 4 |
| ENSG00000071246 | VASH1 | 0.7427122 | 4 |
| ENSG00000102882 | MAPK3 | 0.7292824 | 4 |
| ENSG00000105926 | MPP6 | 0.7203155 | 4 |
| ENSG00000150394 | CDH8 | 0.7175609 | 4 |
| ENSG00000189221 | MAOA | 0.7097246 | 4 |
| ENSG00000151612 | ZNF827 | 0.7063249 | 4 |
| ENSG00000156011 | PSD3 | 0.7010282 | 4 |
| ENSG00000111961 | SASH1 | 0.6992862 | 4 |
| ENSG00000166736 | HTR3A | 0.6679956 | 4 |
| ENSG00000164638 | SLC29A4 | 0.6491610 | 4 |
| ENSG00000137713 | PPP2R1B | 0.6429055 | 4 |
| ENSG00000100852 | ARHGAP5 | 0.6413589 | 4 |
| ENSG00000101958 | GLRA2 | 0.6321659 | 4 |
| ENSG00000189283 | FHIT | 0.6091543 | 4 |
| ENSG00000175745 | NR2F1 | 0.6052473 | 4 |
| ENSG00000170004 | CHD3 | 0.6035037 | 4 |
| ENSG00000102781 | KATNAL1 | 0.6026056 | 4 |
| ENSG00000157152 | SYN2 | 0.5936906 | 4 |
| ENSG00000070367 | EXOC5 | 0.5676244 | 4 |
| ENSG00000007314 | SCN4A | 0.5606462 | 4 |
| ENSG00000188641 | DPYD | 0.5426447 | 4 |
| ENSG00000153266 | FEZF2 | 0.5371557 | 4 |
| ENSG00000198286 | CARD11 | 0.4951990 | 4 |
| ENSG00000144036 | EXOC6B | 0.4711673 | 4 |
| ENSG00000198690 | FAN1 | 0.4503471 | 4 |
| ENSG00000153187 | HNRNPU | 0.4463083 | 4 |
| ENSG00000224389 | C4B | 0.4037258 | 4 |
| ENSG00000145794 | MEGF10 | 0.3904062 | 4 |
| ENSG00000157890 | MEGF11 | 0.3753489 | 4 |
| ENSG00000099954 | CECR2 | 0.3398445 | 4 |
| ENSG00000131771 | PPP1R1B | 0.2518863 | 4 |
| ENSG00000070371 | CLTCL1 | 0.1365827 | 4 |
| ENSG00000147402 | GABRQ | 0.9598506 | 5 |
| ENSG00000116852 | KIF21B | 0.8867727 | 5 |
| ENSG00000131795 | RBM8A | 0.8854904 | 5 |
| ENSG00000211460 | TSN | 0.8722982 | 5 |
| ENSG00000163288 | GABRB1 | 0.8722409 | 5 |
| ENSG00000186297 | GABRA5 | 0.8251354 | 5 |
| ENSG00000050628 | PTGER3 | 0.8224128 | 5 |
| ENSG00000100030 | MAPK1 | 0.8199824 | 5 |
| ENSG00000112038 | OPRM1 | 0.7778386 | 5 |
| ENSG00000139734 | DIAPH3 | 0.7364629 | 5 |
| ENSG00000004777 | ARHGAP33 | 0.7354770 | 5 |
| ENSG00000177791 | MYOZ1 | 0.6793381 | 5 |
| ENSG00000169855 | ROBO1 | 0.6134188 | 5 |
| ENSG00000147852 | VLDLR | 0.5714398 | 5 |
| ENSG00000124406 | ATP8A1 | 0.5590179 | 5 |
| ENSG00000172020 | GAP43 | 0.4984025 | 5 |
| ENSG00000173406 | DAB1 | 0.4866904 | 5 |
| ENSG00000178568 | ERBB4 | 0.4554541 | 5 |
| ENSG00000171444 | MCC | 0.4541775 | 5 |
| ENSG00000138639 | ARHGAP24 | 0.4530811 | 5 |
| ENSG00000101843 | PSMD10 | 0.4054877 | 5 |
| ENSG00000116254 | CHD5 | 0.4045844 | 5 |
| ENSG00000148730 | EIF4EBP2 | 0.3426925 | 5 |
| ENSG00000080224 | EPHA6 | 0.2654092 | 5 |
| ENSG00000261609 | GAN | 0.2252501 | 5 |
| ENSG00000117009 | KMO | 0.0682514 | 5 |
| ENSG00000066468 | FGFR2 | 0.0635940 | 5 |
| ENSG00000091831 | ESR1 | NA | 5 |
| ENSG00000164434 | FABP7 | 0.9663938 | 6 |
| ENSG00000172340 | SUCLG2 | 0.8271255 | 6 |
| ENSG00000160200 | CBS | 0.7295583 | 6 |
| ENSG00000072364 | AFF4 | 0.7268624 | 6 |
| ENSG00000179603 | GRM8 | 0.6767789 | 6 |
| ENSG00000075884 | ARHGAP15 | 0.5094652 | 6 |
| ENSG00000171791 | BCL2 | 0.3409641 | 6 |
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` != 'None') %>% dplyr::select(-Module),
caption=paste0('SFARI Genes for Module ', top_modules[2]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000136535 | TBR1 | -0.8840838 | 1 |
| ENSG00000174469 | CNTNAP2 | -0.9496760 | 2 |
| ENSG00000080603 | SRCAP | -0.8284032 | 2 |
| ENSG00000155974 | GRIP1 | -0.7394422 | 2 |
| ENSG00000169057 | MECP2 | -0.6872414 | 2 |
| ENSG00000105976 | MET | -0.6604030 | 2 |
| ENSG00000119866 | BCL11A | -0.6366413 | 2 |
| ENSG00000124140 | SLC12A5 | -0.9360019 | 3 |
| ENSG00000074590 | NUAK1 | -0.9348188 | 3 |
| ENSG00000078328 | RBFOX1 | -0.9186918 | 3 |
| ENSG00000144285 | SCN1A | -0.9173283 | 3 |
| ENSG00000170579 | DLGAP1 | -0.9137577 | 3 |
| ENSG00000151150 | ANK3 | -0.8831013 | 3 |
| ENSG00000183454 | GRIN2A | -0.8640176 | 3 |
| ENSG00000132294 | EFR3A | -0.8381554 | 3 |
| ENSG00000157087 | ATP2B2 | -0.8042493 | 3 |
| ENSG00000182621 | PLCB1 | -0.7900910 | 3 |
| ENSG00000166147 | FBN1 | -0.7834378 | 3 |
| ENSG00000197535 | MYO5A | -0.7768036 | 3 |
| ENSG00000165300 | SLITRK5 | -0.7720362 | 3 |
| ENSG00000132604 | TERF2 | -0.6862455 | 3 |
| ENSG00000176884 | GRIN1 | -0.6792205 | 3 |
| ENSG00000127616 | SMARCA4 | -0.6387236 | 3 |
| ENSG00000146830 | GIGYF1 | -0.6085437 | 3 |
| ENSG00000175344 | CHRNA7 | -0.5995574 | 3 |
| ENSG00000170396 | ZNF804A | -0.5809462 | 3 |
| ENSG00000170471 | RALGAPB | -0.5332836 | 3 |
| ENSG00000166148 | AVPR1A | -0.3651616 | 3 |
| ENSG00000112655 | PTK7 | -0.2882900 | 3 |
| ENSG00000196876 | SCN8A | NA | 3 |
| ENSG00000163618 | CADPS | -0.9999000 | 4 |
| ENSG00000159082 | SYNJ1 | -0.9897629 | 4 |
| ENSG00000129159 | KCNC1 | -0.9729424 | 4 |
| ENSG00000150995 | ITPR1 | -0.9570730 | 4 |
| ENSG00000135631 | RAB11FIP5 | -0.9517540 | 4 |
| ENSG00000105409 | ATP1A3 | -0.9390908 | 4 |
| ENSG00000154263 | ABCA10 | -0.9299190 | 4 |
| ENSG00000197635 | DPP4 | -0.9233889 | 4 |
| ENSG00000115840 | SLC25A12 | -0.9230049 | 4 |
| ENSG00000155886 | SLC24A2 | -0.9212817 | 4 |
| ENSG00000006377 | DLX6 | -0.9139947 | 4 |
| ENSG00000132639 | SNAP25 | -0.8940219 | 4 |
| ENSG00000144406 | UNC80 | -0.8897701 | 4 |
| ENSG00000157483 | MYO1E | -0.8613604 | 4 |
| ENSG00000102181 | CD99L2 | -0.8571033 | 4 |
| ENSG00000172260 | NEGR1 | -0.8555347 | 4 |
| ENSG00000165355 | FBXO33 | -0.8552039 | 4 |
| ENSG00000115421 | PAPOLG | -0.8488878 | 4 |
| ENSG00000144331 | ZNF385B | -0.8379079 | 4 |
| ENSG00000144290 | SLC4A10 | -0.8341455 | 4 |
| ENSG00000128594 | LRRC4 | -0.8308582 | 4 |
| ENSG00000100038 | TOP3B | -0.8235623 | 4 |
| ENSG00000163399 | ATP1A1 | -0.8174485 | 4 |
| ENSG00000101883 | RHOXF1 | -0.8112958 | 4 |
| ENSG00000087460 | GNAS | -0.8065352 | 4 |
| ENSG00000047188 | YTHDC2 | -0.8055959 | 4 |
| ENSG00000117594 | HSD11B1 | -0.8000815 | 4 |
| ENSG00000081803 | CADPS2 | -0.7852218 | 4 |
| ENSG00000117016 | RIMS3 | -0.7802470 | 4 |
| ENSG00000109906 | ZBTB16 | -0.7646739 | 4 |
| ENSG00000014138 | POLA2 | -0.7381408 | 4 |
| ENSG00000148935 | GAS2 | -0.7378531 | 4 |
| ENSG00000215045 | GRID2IP | -0.7272142 | 4 |
| ENSG00000130402 | ACTN4 | -0.7186294 | 4 |
| ENSG00000109158 | GABRA4 | -0.7160039 | 4 |
| ENSG00000174938 | SEZ6L2 | -0.7072858 | 4 |
| ENSG00000184408 | KCND2 | -0.6836501 | 4 |
| ENSG00000182901 | RGS7 | -0.6799908 | 4 |
| ENSG00000139915 | MDGA2 | -0.6440130 | 4 |
| ENSG00000042781 | USH2A | -0.6424911 | 4 |
| ENSG00000160862 | AZGP1 | -0.5968575 | 4 |
| ENSG00000264424 | MYH4 | -0.5885198 | 4 |
| ENSG00000169083 | AR | -0.0931115 | 4 |
| ENSG00000152402 | GUCY1A2 | -0.0444340 | 4 |
| ENSG00000204466 | DGKK | -0.9819312 | 5 |
| ENSG00000145740 | SLC30A5 | -0.9761635 | 5 |
| ENSG00000178718 | RPP25 | -0.9686983 | 5 |
| ENSG00000174996 | KLC2 | -0.9683999 | 5 |
| ENSG00000128739 | SNRPN | -0.9259603 | 5 |
| ENSG00000118596 | SLC16A7 | -0.9142353 | 5 |
| ENSG00000065989 | PDE4A | -0.8574696 | 5 |
| ENSG00000156463 | SH3RF2 | -0.8562647 | 5 |
| ENSG00000185666 | SYN3 | -0.8560706 | 5 |
| ENSG00000100362 | PVALB | -0.8269023 | 5 |
| ENSG00000007171 | NOS2 | -0.8216243 | 5 |
| ENSG00000082701 | GSK3B | -0.8126652 | 5 |
| ENSG00000140527 | WDR93 | -0.8066951 | 5 |
| ENSG00000151148 | UBE3B | -0.7972316 | 5 |
| ENSG00000139182 | CLSTN3 | -0.7821418 | 5 |
| ENSG00000107147 | KCNT1 | -0.7780941 | 5 |
| ENSG00000197381 | ADARB1 | -0.7529056 | 5 |
| ENSG00000106113 | CRHR2 | -0.7453032 | 5 |
| ENSG00000106123 | EPHB6 | -0.7341812 | 5 |
| ENSG00000008735 | MAPK8IP2 | -0.6747744 | 5 |
| ENSG00000157168 | NRG1 | -0.5103462 | 5 |
| ENSG00000022355 | GABRA1 | NA | 5 |
| ENSG00000104725 | NEFL | NA | 5 |
| ENSG00000171735 | CAMTA1 | NA | 5 |
| ENSG00000213023 | SYT3 | -0.8957519 | 6 |
Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case
plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>%
group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>%
mutate(p=round(p/n*100,2))
for(i in 1:nrow(plot_data)){
this_row = plot_data[i,]
if(this_row$hasSFARIscore==FALSE & this_row$p==100){
new_row = this_row
new_row$hasSFARIscore = TRUE
new_row$p = 0
plot_data = plot_data %>% rbind(new_row)
}
}
plot_data = plot_data %>% filter(hasSFARIscore==TRUE)
ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)
Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.
In all cases, the Eigengenes separate the behaviour between autism and control samples very clearly!
plot_EGs = function(module){
plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)
p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
ggtitle(paste0('Module ', module, ' (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
return(p)
}
p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
grid.arrange(p1, p2, nrow=1)
rm(plot_EGs, p1, p2)
Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance
*Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
return(top_genes)
}
top_genes_1 = create_table(top_modules[1])
kable(top_genes_1, caption=paste0('Top 10 genes for module ', top_modules[1], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000144369 | FAM171B | 0.9361619 | 0.9999000 | None | 0.9680309 |
| ENSG00000172380 | GNG12 | 0.9159014 | 0.9999000 | None | 0.9579007 |
| ENSG00000135299 | ANKRD6 | 0.9138902 | 0.9969959 | None | 0.9554431 |
| ENSG00000162734 | PEA15 | 0.9034754 | 0.9968303 | None | 0.9501529 |
| ENSG00000181467 | RAP2B | 0.8998701 | 0.9999000 | None | 0.9498850 |
| ENSG00000147402 | GABRQ | 0.9356259 | 0.9598506 | 5 | 0.9477382 |
| ENSG00000164199 | GPR98 | 0.8923921 | 0.9999000 | None | 0.9461460 |
| ENSG00000116117 | PARD3B | 0.9115571 | 0.9747438 | 3 | 0.9431504 |
| ENSG00000046653 | GPM6B | 0.9454595 | 0.9399552 | None | 0.9427074 |
| ENSG00000178252 | WDR6 | 0.9056134 | 0.9736618 | None | 0.9396376 |
| ENSG00000163110 | PDLIM5 | 0.9252966 | 0.9439713 | None | 0.9346339 |
| ENSG00000137033 | IL33 | 0.8700031 | 0.9959383 | None | 0.9329707 |
| ENSG00000135052 | GOLM1 | 0.9148458 | 0.9508269 | None | 0.9328364 |
| ENSG00000121766 | ZCCHC17 | 0.8768631 | 0.9845722 | None | 0.9307176 |
| ENSG00000007372 | PAX6 | 0.8708351 | 0.9850589 | None | 0.9279470 |
| ENSG00000077721 | UBE2A | 0.8449761 | 0.9999000 | None | 0.9224380 |
| ENSG00000145555 | MYO10 | 0.8648672 | 0.9761415 | None | 0.9205044 |
| ENSG00000204060 | FOXO6 | 0.8496752 | 0.9886059 | None | 0.9191406 |
| ENSG00000103876 | FAH | 0.8343395 | 0.9999000 | None | 0.9171197 |
| ENSG00000100568 | VTI1B | 0.9430318 | 0.8893725 | None | 0.9162022 |
top_genes_2 = create_table(top_modules[2])
kable(top_genes_2, caption=paste0('Top 10 genes for module ', top_modules[2], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000215397 | SCRT2 | 0.9325901 | -0.9999000 | None | 0.9662450 |
| ENSG00000141314 | RHBDL3 | 0.9162767 | -0.9903965 | None | 0.9533366 |
| ENSG00000188582 | PAQR9 | 0.9115714 | -0.9916901 | None | 0.9516307 |
| ENSG00000164588 | HCN1 | 0.9375144 | -0.9654055 | None | 0.9514599 |
| ENSG00000185133 | INPP5J | 0.9146103 | -0.9860537 | None | 0.9503320 |
| ENSG00000248905 | FMN1 | 0.9006433 | -0.9999000 | None | 0.9502717 |
| ENSG00000162694 | EXTL2 | 0.9130878 | -0.9839885 | None | 0.9485382 |
| ENSG00000113327 | GABRG2 | 0.9008442 | -0.9918740 | None | 0.9463591 |
| ENSG00000029534 | ANK1 | 0.8946657 | -0.9969839 | None | 0.9458248 |
| ENSG00000051382 | PIK3CB | 0.8974280 | -0.9941465 | None | 0.9457872 |
| ENSG00000159840 | ZYX | 0.8993040 | -0.9921196 | None | 0.9457118 |
| ENSG00000115977 | AAK1 | 0.8958363 | -0.9954082 | None | 0.9456223 |
| ENSG00000170049 | KCNAB3 | 0.9193674 | -0.9706054 | None | 0.9449864 |
| ENSG00000164114 | MAP9 | 0.8929287 | -0.9941216 | None | 0.9435252 |
| ENSG00000118596 | SLC16A7 | 0.9710280 | -0.9142353 | 5 | 0.9426317 |
| ENSG00000141750 | STAC2 | 0.8863954 | -0.9973965 | None | 0.9418960 |
| ENSG00000163624 | CDS1 | 0.8821721 | -0.9999000 | None | 0.9410360 |
| ENSG00000198825 | INPP5F | 0.8759157 | -0.9999000 | None | 0.9379078 |
| ENSG00000185760 | KCNQ5 | 0.9249319 | -0.9488275 | None | 0.9368797 |
| ENSG00000124198 | ARFGEF2 | 0.8773453 | -0.9961151 | None | 0.9367302 |
rm(create_table)
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules &
ID %in% c(as.character(top_genes_1$ID),
as.character(top_genes_2$ID)), 1, 0.05))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
theme_minimal() + ggtitle('Important genes identified through WGCNA')
Using the package anRichment
It was designed by Peter Langfelder explicitly to perform enrichmen analysis on WGCNA’s modules in brain-related experiments (mainly Huntington’s Disease)
It has packages with brain annotations:
BrainDiseaseCollection: A Brain Disease Gene Set Collection for anRichment
MillerAIBSCollection: (included in anRichment) Contains gene sets collected by Jeremy A. Miller at AIBS of various cell type and brain region marker sets, gene sets collected from expression studies of developing brain, as well as a collection of transcription factor (TF) targets from the original ChEA study
The tutorial says it’s an experimental package
It’s not on CRAN nor in Bioconductor
# Prepare dataset
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Batch submitting query [=>------------------------] 6% eta: 14s
## Batch submitting query [==>-----------------------] 12% eta: 11s
## Batch submitting query [====>---------------------] 18% eta: 8s
## Batch submitting query [=====>--------------------] 24% eta: 7s
## Batch submitting query [=======>------------------] 29% eta: 7s
## Batch submitting query [========>-----------------] 35% eta: 6s
## Batch submitting query [==========>---------------] 41% eta: 5s
## Batch submitting query [===========>--------------] 47% eta: 5s
## Batch submitting query [=============>------------] 53% eta: 4s
## Batch submitting query [==============>-----------] 59% eta: 3s
## Batch submitting query [================>---------] 65% eta: 3s
## Batch submitting query [=================>--------] 71% eta: 2s
## Batch submitting query [===================>------] 76% eta: 2s Batch
## submitting query [====================>-----] 82% eta: 1s Batch submitting
## query [======================>---] 88% eta: 1s Batch submitting query
## [=======================>--] 94% eta: 0s
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')
for(tm in top_modules){
cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
tm, ' don\'t have an Entrez Gene ID'))
}
##
## 33 genes from top module #C19B00 don't have an Entrez Gene ID
## 17 genes from top module #00C1A2 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf
collectGarbage()
# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])
# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
##
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)
# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
## [1] "GO"
## [2] "GO.BP"
## [3] "GO.MF"
## [4] "GO.CC"
## [5] "JA Miller at AIBS"
## [6] "Chip-X enrichment analysis (ChEA)"
## [7] "Brain"
## [8] "JAM"
## [9] "Prenatal brain"
## [10] "Brain region markers"
## [11] "Cortex"
## [12] "Brain region marker enriched gene sets"
## [13] "WGCNA"
## [14] "BrainRegionMarkers"
## [15] "BrainRegionMarkers.HBA"
## [16] "BrainRegionMarkers.HBA.localMarker(top200)"
## [17] "Postnatal brain"
## [18] "ImmunePathways"
## [19] "Markers of cortex layers"
## [20] "BrainLists"
## [21] "Cell type markers"
## [22] "Germinal brain"
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"
## [25] "Postmitotic brain"
## [26] "BrainLists.Blalock_AD"
## [27] "BrainLists.DiseaseGenes"
## [28] "BloodAtlases"
## [29] "Verge Disease Genes"
## [30] "BloodAtlases.Whitney"
## [31] "BrainLists.JAXdiseaseGene"
## [32] "BrainLists.MO"
## [33] "Age-associated genes"
## [34] "BrainLists.Lu_Aging"
## [35] "Cell type marker enriched gene sets"
## [36] "BrainLists.CA1vsCA3"
## [37] "BrainLists.MitochondrialType"
## [38] "BrainLists.MO.2+_26Mar08"
## [39] "BrainLists.MO.Sugino"
## [40] "BloodAtlases.Gnatenko2"
## [41] "BloodAtlases.Kabanova"
## [42] "BrainLists.Voineagu"
## [43] "StemCellLists"
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
refCollection = combined_col, useBackground = 'given',
threshold = 1e-4, thresholdType = 'Bonferroni',
getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
## enrichmentAnalysis: preparing data..
## ..working on label set 1 ..
kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR,
effectiveClassSize, effectiveSetSize, nCommonGenes),
caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000084 | Bergman glia | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers | 0.00e+00 | 0e+00 | 2153 | 1211 | 341 |
| JAM:002807 | Cingulate Gyrus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 151 | 95 |
| JAMiller.AIBS.000085 | Cerebellar astrocytes | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers | 0.00e+00 | 0e+00 | 2153 | 1429 | 378 |
| JAMiller.AIBS.000143 | Lowest in CP of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.00e+00 | 0e+00 | 2153 | 1433 | 371 |
| JAMiller.AIBS.000009 | VZ markers at 15-16 post-conception weeks | JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Germinal brain | 0.00e+00 | 0e+00 | 2153 | 1238 | 328 |
| JAMiller.AIBS.000097 | Cortical astrocytes | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex | 0.00e+00 | 0e+00 | 2153 | 2289 | 505 |
| JAM:003078 | Temporal Lobe_IN_Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 163 | 81 |
| JAMiller.AIBS.000148 | Highest in VZ of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain | 0.00e+00 | 0e+00 | 2153 | 672 | 195 |
| JAMiller.AIBS.000112 | HippocampusWGCNA greenyellow SGZenriched astrocyte | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 0.00e+00 | 0e+00 | 2153 | 245 | 96 |
| JAM:002981 | Parahippocampal Gyrus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 153 | 72 |
| JAMiller.AIBS.000098 | Cortical oligodendrocytes (Olig2) | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex | 0.00e+00 | 0e+00 | 2153 | 2254 | 447 |
| JAM:002731 | Amygdala | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 160 | 71 |
| JAM:002773 | upAging_copperHomeostatisMT1 | JAM|BrainLists|BrainLists.Lu_Aging | 0.00e+00 | 0e+00 | 2153 | 119 | 59 |
| JAMiller.AIBS.000154 | Highest in VZ of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain | 0.00e+00 | 0e+00 | 2153 | 1704 | 356 |
| JAMiller.AIBS.000193 | CortexWGCNA midfetal M23 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 0.00e+00 | 0e+00 | 2153 | 998 | 232 |
| JAM:002909 | Insula | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 150 | 63 |
| JAMiller.AIBS.000082 | Cerebellar oligodendrocytes (Olig2) | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers | 0.00e+00 | 0e+00 | 2153 | 1489 | 306 |
| JAM:002852 | frontal part, superior bank of gyrus_IN_Cingulate Gyrus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 2153 | 151 | 61 |
| JAMiller.AIBS.000506 | Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2153 | 3398 | 584 |
| JAMiller.AIBS.000106 | Genes enriched in the hippocampal SGZ in mouse | JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers | 0.00e+00 | 0e+00 | 2153 | 336 | 96 |
| JAMiller.AIBS.000126 | Subependymal zone parenchymalAstrocytesFromDiencephalon(GFAP+/inDiencephalon) | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers | 0.00e+00 | 0e+00 | 2153 | 107 | 45 |
| JAMiller.AIBS.000466 | Genes bound by SMARCA4 in MOUSE OLIGODENDROCYTES from PMID 23332759 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2153 | 2052 | 378 |
| JAMiller.AIBS.000064 | CortexWGCNA 15-21 post-conception weeks C38 cellCycle SZ/VZenriched | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 2153 | 528 | 130 |
| JAMiller.AIBS.000364 | Genes bound by MTF2 in MOUSE MESC from PMID 20144788 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2153 | 2301 | 414 |
| JAMiller.AIBS.000204 | RegionalWGCNA midfetal M34 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 0.00e+00 | 0e+00 | 2153 | 417 | 109 |
| JAMiller.AIBS.000504 | Genes bound by SUZ12 in mouse MESC from PMID 18692474 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2153 | 1372 | 270 |
| JAMiller.AIBS.000537 | Genes bound by TP63 in HUMAN EP156T from PMID 23658742 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 1.00e-07 | 0e+00 | 2153 | 2813 | 487 |
| JAMiller.AIBS.000505 | Genes bound by SUZ12 in MOUSE MESC from PMID 18974828 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 1.00e-07 | 0e+00 | 2153 | 1396 | 272 |
| JAM:003117 | noChangeAD_antigenProcessing_ribosome | JAM|BrainLists|BrainLists.Blalock_AD | 2.00e-07 | 0e+00 | 2153 | 244 | 73 |
| JAMiller.AIBS.000075 | GerminalZonesWGCNA 15-21 post-conception weeks G7 VZ-enriched | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain | 9.00e-07 | 0e+00 | 2153 | 609 | 139 |
| GO:0007423 | sensory organ development | GO|GO.BP | 1.10e-06 | 0e+00 | 2153 | 480 | 116 |
| JAMiller.AIBS.000138 | VZ/SZ/IZ enriched in E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain | 2.40e-06 | 0e+00 | 2153 | 333 | 88 |
| GO:0007154 | cell communication | GO|GO.BP | 2.90e-06 | 0e+00 | 2153 | 5344 | 832 |
| JAMiller.AIBS.000110 | HippocampusWGCNA cyan SGZenriched upAge glia/gliogenesis | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 3.20e-06 | 0e+00 | 2153 | 151 | 51 |
| GO:0005886 | plasma membrane | GO|GO.CC | 7.10e-06 | 1e-07 | 2153 | 4326 | 689 |
| JAMiller.AIBS.000002 | SZo markers at 21 post-conception weeks | JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Germinal brain | 1.30e-05 | 1e-07 | 2153 | 254 | 71 |
| GO:0023052 | signaling | GO|GO.BP | 1.87e-05 | 2e-07 | 2153 | 5329 | 824 |
| JAMiller.AIBS.000163 | Genes decreasing in fetal and increasing in aging | JA Miller at AIBS|Brain|Age-associated genes|Cortex | 2.94e-05 | 2e-07 | 2153 | 65 | 29 |
| GO:0071944 | cell periphery | GO|GO.CC | 3.12e-05 | 2e-07 | 2153 | 4431 | 699 |
| GO:0009653 | anatomical structure morphogenesis | GO|GO.BP | 3.15e-05 | 2e-07 | 2153 | 2314 | 398 |
| GO:0048869 | cellular developmental process | GO|GO.BP | 3.47e-05 | 3e-07 | 2153 | 3653 | 590 |
| JAMiller.AIBS.000203 | RegionalWGCNA midfetal M33 Striatum+Thalamus+Cerebellum | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 3.76e-05 | 3e-07 | 2153 | 461 | 108 |
| JAMiller.AIBS.000062 | CortexWGCNA 15-21 post-conception weeks C36 SZ/VZenriched | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 4.77e-05 | 3e-07 | 2153 | 152 | 49 |
| GO:0003008 | system process | GO|GO.BP | 6.15e-05 | 4e-07 | 2153 | 1528 | 279 |
| GO:0048468 | cell development | GO|GO.BP | 7.27e-05 | 5e-07 | 2153 | 1929 | 339 |
| JAMiller.AIBS.000076 | GerminalZonesWGCNA 15-21 post-conception weeks G8 VZ-enriched | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain | 8.01e-05 | 5e-07 | 2153 | 793 | 163 |
| GO:0007507 | heart development | GO|GO.BP | 8.84e-05 | 6e-07 | 2153 | 502 | 114 |
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR,
effectiveClassSize, effectiveSetSize, nCommonGenes),
caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|
| JAM:002744 | Autism_differential_expression_across_at_least_one_comparison | JAM|BrainLists|BrainLists.Voineagu | 0.00e+00 | 0e+00 | 1327 | 760 | 166 |
| JAMiller.AIBS.000142 | Highest in CP of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 1327 | 1211 | 208 |
| JAM:002967 | Occipital Lobe_IN_Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 1327 | 155 | 58 |
| JAMiller.AIBS.000506 | Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 1327 | 3398 | 418 |
| JAM:002985 | Parietal Lobe_IN_Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 1327 | 120 | 48 |
| JAM:002986 | parietal part, inferior bank of gyrus_IN_Cingulate Gyrus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 1327 | 122 | 48 |
| JAM:002824 | Dentate Nucleus_IN_Cerebellar Nucleus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 1327 | 166 | 55 |
| JAMiller.AIBS.000569 | WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 1327 | 4047 | 468 |
| JAMiller.AIBS.000155 | Lowest in VZ of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.00e+00 | 0e+00 | 1327 | 1657 | 236 |
| JAMiller.AIBS.000150 | Highest in CP of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 1327 | 1265 | 194 |
| GO:0005215 | transporter activity | GO|GO.MF | 0.00e+00 | 0e+00 | 1327 | 997 | 153 |
| GO:0022857 | transmembrane transporter activity | GO|GO.MF | 0.00e+00 | 0e+00 | 1327 | 915 | 142 |
| GO:0097458 | neuron part | GO|GO.CC | 0.00e+00 | 0e+00 | 1327 | 1611 | 214 |
| GO:0015318 | inorganic molecular entity transmembrane transporter activity | GO|GO.MF | 0.00e+00 | 0e+00 | 1327 | 720 | 116 |
| GO:0015075 | ion transmembrane transporter activity | GO|GO.MF | 0.00e+00 | 0e+00 | 1327 | 773 | 122 |
| GO:0045202 | synapse | GO|GO.CC | 1.00e-07 | 0e+00 | 1327 | 1112 | 156 |
| GO:0022890 | inorganic cation transmembrane transporter activity | GO|GO.MF | 3.00e-07 | 0e+00 | 1327 | 525 | 90 |
| JAM:003016 | downAD_synapticTransmission | JAM|BrainLists|BrainLists.Blalock_AD | 3.00e-07 | 0e+00 | 1327 | 89 | 30 |
| JAMiller.AIBS.000504 | Genes bound by SUZ12 in mouse MESC from PMID 18692474 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 5.00e-07 | 0e+00 | 1327 | 1372 | 181 |
| GO:0006811 | ion transport | GO|GO.BP | 6.00e-07 | 0e+00 | 1327 | 1483 | 192 |
| GO:0008324 | cation transmembrane transporter activity | GO|GO.MF | 6.00e-07 | 0e+00 | 1327 | 566 | 94 |
| GO:0015079 | potassium ion transmembrane transporter activity | GO|GO.MF | 7.00e-07 | 0e+00 | 1327 | 145 | 39 |
| GO:0044456 | synapse part | GO|GO.CC | 7.00e-07 | 0e+00 | 1327 | 889 | 130 |
| JAMiller.AIBS.000078 | Cerebellar granule cells | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers | 1.00e-06 | 0e+00 | 1327 | 1513 | 194 |
| GO:0034220 | ion transmembrane transport | GO|GO.BP | 1.00e-06 | 0e+00 | 1327 | 1054 | 147 |
| JAM:003072 | Tail of Caudate Nucleus_IN_Striatum | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.10e-06 | 0e+00 | 1327 | 160 | 41 |
| JAMiller.AIBS.000123 | HippocampusWGCNA turquoise DGenriched upAge | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 1.10e-06 | 0e+00 | 1327 | 1103 | 152 |
| GO:0015672 | monovalent inorganic cation transport | GO|GO.BP | 1.30e-06 | 0e+00 | 1327 | 455 | 80 |
| GO:1902495 | transmembrane transporter complex | GO|GO.CC | 1.30e-06 | 0e+00 | 1327 | 296 | 60 |
| GO:1990351 | transporter complex | GO|GO.CC | 1.40e-06 | 0e+00 | 1327 | 304 | 61 |
| GO:0046873 | metal ion transmembrane transporter activity | GO|GO.MF | 1.50e-06 | 0e+00 | 1327 | 391 | 72 |
| GO:0007268 | chemical synaptic transmission | GO|GO.BP | 2.20e-06 | 0e+00 | 1327 | 640 | 101 |
| GO:0098916 | anterograde trans-synaptic signaling | GO|GO.BP | 2.20e-06 | 0e+00 | 1327 | 640 | 101 |
| GO:0006813 | potassium ion transport | GO|GO.BP | 2.50e-06 | 0e+00 | 1327 | 212 | 48 |
| JAMiller.AIBS.000364 | Genes bound by MTF2 in MOUSE MESC from PMID 20144788 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 3.40e-06 | 0e+00 | 1327 | 2301 | 268 |
| GO:0099536 | synaptic signaling | GO|GO.BP | 3.50e-06 | 0e+00 | 1327 | 654 | 102 |
| GO:0099537 | trans-synaptic signaling | GO|GO.BP | 4.50e-06 | 0e+00 | 1327 | 648 | 101 |
| JAM:002847 | facial motor nucleus_IN_Pontine Tegmentum | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 6.30e-06 | 1e-07 | 1327 | 155 | 39 |
| JAM:003047 | Somatic | JAM|BrainLists|BrainLists.MitochondrialType | 6.40e-06 | 1e-07 | 1327 | 142 | 37 |
| GO:0071804 | cellular potassium ion transport | GO|GO.BP | 7.60e-06 | 1e-07 | 1327 | 190 | 44 |
| GO:0071805 | potassium ion transmembrane transport | GO|GO.BP | 7.60e-06 | 1e-07 | 1327 | 190 | 44 |
| GO:0022839 | ion gated channel activity | GO|GO.MF | 7.90e-06 | 1e-07 | 1327 | 301 | 59 |
| JAM:002763 | downAD_metalIonTransport_glycoprotein | JAM|BrainLists|BrainLists.Blalock_AD | 1.05e-05 | 1e-07 | 1327 | 280 | 56 |
| GO:0005887 | integral component of plasma membrane | GO|GO.CC | 1.08e-05 | 1e-07 | 1327 | 1393 | 178 |
| GO:0015077 | monovalent inorganic cation transmembrane transporter activity | GO|GO.MF | 1.11e-05 | 1e-07 | 1327 | 327 | 62 |
| GO:0005249 | voltage-gated potassium channel activity | GO|GO.MF | 1.16e-05 | 1e-07 | 1327 | 78 | 26 |
| GO:0005244 | voltage-gated ion channel activity | GO|GO.MF | 1.30e-05 | 1e-07 | 1327 | 179 | 42 |
| GO:0022832 | voltage-gated channel activity | GO|GO.MF | 1.30e-05 | 1e-07 | 1327 | 179 | 42 |
| GO:0022803 | passive transmembrane transporter activity | GO|GO.MF | 1.48e-05 | 1e-07 | 1327 | 402 | 71 |
| GO:0034702 | ion channel complex | GO|GO.CC | 1.54e-05 | 1e-07 | 1327 | 275 | 55 |
| JAMiller.AIBS.000570 | WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 1.93e-05 | 2e-07 | 1327 | 480 | 80 |
| JAM:002805 | Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 2.12e-05 | 2e-07 | 1327 | 161 | 39 |
| GO:0005216 | ion channel activity | GO|GO.MF | 2.58e-05 | 2e-07 | 1327 | 374 | 67 |
| GO:0031226 | intrinsic component of plasma membrane | GO|GO.CC | 2.64e-05 | 2e-07 | 1327 | 1459 | 183 |
| GO:0022836 | gated channel activity | GO|GO.MF | 2.65e-05 | 2e-07 | 1327 | 310 | 59 |
| GO:0098660 | inorganic ion transmembrane transport | GO|GO.BP | 2.84e-05 | 2e-07 | 1327 | 742 | 109 |
| GO:0015267 | channel activity | GO|GO.MF | 3.36e-05 | 3e-07 | 1327 | 401 | 70 |
| GO:0043005 | neuron projection | GO|GO.CC | 3.76e-05 | 3e-07 | 1327 | 1215 | 158 |
| GO:0097060 | synaptic membrane | GO|GO.CC | 4.05e-05 | 3e-07 | 1327 | 411 | 71 |
| GO:0071944 | cell periphery | GO|GO.CC | 5.44e-05 | 4e-07 | 1327 | 4431 | 453 |
| JAMiller.AIBS.000134 | Layer4 enriched in adult macaque cortex | JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers|Cortex | 5.63e-05 | 4e-07 | 1327 | 139 | 35 |
| GO:0005886 | plasma membrane | GO|GO.CC | 7.66e-05 | 5e-07 | 1327 | 4326 | 443 |
| GO:0022838 | substrate-specific channel activity | GO|GO.MF | 8.14e-05 | 5e-07 | 1327 | 384 | 67 |
| JAM:002927 | Long Insular Gyri_IN_Insula | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 9.22e-05 | 6e-07 | 1327 | 109 | 30 |
Save Enrichment Analysis results
save(enrichment, file='./../Data/enrichmentAnalysis.RData')
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Scientific Linux 7.6 (Nitrogen)
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.10.0
## [2] BrainDiseaseCollection_1.00
## [3] anRichment_1.01-2
## [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.10.0
## [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [6] GenomicFeatures_1.38.2
## [7] GenomicRanges_1.38.0
## [8] GenomeInfoDb_1.22.0
## [9] anRichmentMethods_0.90-1
## [10] WGCNA_1.68
## [11] fastcluster_1.1.25
## [12] dynamicTreeCut_1.63-1
## [13] GO.db_3.10.0
## [14] AnnotationDbi_1.48.0
## [15] IRanges_2.20.2
## [16] S4Vectors_0.24.3
## [17] Biobase_2.46.0
## [18] BiocGenerics_0.32.0
## [19] biomaRt_2.42.0
## [20] knitr_1.24
## [21] doParallel_1.0.15
## [22] iterators_1.0.12
## [23] foreach_1.4.7
## [24] polycor_0.7-10
## [25] expss_0.10.1
## [26] GGally_1.4.0
## [27] gridExtra_2.3
## [28] viridis_0.5.1
## [29] viridisLite_0.3.0
## [30] RColorBrewer_1.1-2
## [31] dendextend_1.13.3
## [32] plotly_4.9.2
## [33] glue_1.3.1
## [34] reshape2_1.4.3
## [35] forcats_0.4.0
## [36] stringr_1.4.0
## [37] dplyr_0.8.3
## [38] purrr_0.3.3
## [39] readr_1.3.1
## [40] tidyr_1.0.2
## [41] tibble_2.1.3
## [42] ggplot2_3.2.1
## [43] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5
## [3] Hmisc_4.2-0 BiocFileCache_1.10.2
## [5] plyr_1.8.5 lazyeval_0.2.2
## [7] splines_3.6.0 crosstalk_1.0.0
## [9] BiocParallel_1.20.1 robust_0.4-18.2
## [11] digest_0.6.24 htmltools_0.4.0
## [13] fansi_0.4.1 magrittr_1.5
## [15] checkmate_1.9.4 memoise_1.1.0
## [17] fit.models_0.5-14 cluster_2.0.8
## [19] annotate_1.64.0 Biostrings_2.54.0
## [21] modelr_0.1.5 matrixStats_0.55.0
## [23] askpass_1.1 prettyunits_1.0.2
## [25] colorspace_1.4-1 blob_1.2.1
## [27] rvest_0.3.5 rappdirs_0.3.1
## [29] rrcov_1.4-7 haven_2.2.0
## [31] xfun_0.8 crayon_1.3.4
## [33] RCurl_1.95-4.12 jsonlite_1.6
## [35] genefilter_1.68.0 impute_1.60.0
## [37] survival_2.44-1.1 gtable_0.3.0
## [39] zlibbioc_1.32.0 XVector_0.26.0
## [41] DelayedArray_0.12.2 DEoptimR_1.0-8
## [43] scales_1.1.0 mvtnorm_1.0-11
## [45] DBI_1.1.0 Rcpp_1.0.3
## [47] xtable_1.8-4 progress_1.2.2
## [49] htmlTable_1.13.1 foreign_0.8-71
## [51] bit_1.1-15.2 preprocessCore_1.48.0
## [53] Formula_1.2-3 htmlwidgets_1.5.1
## [55] httr_1.4.1 ellipsis_0.3.0
## [57] acepack_1.4.1 farver_2.0.3
## [59] pkgconfig_2.0.3 reshape_0.8.8
## [61] XML_3.99-0.3 nnet_7.3-12
## [63] dbplyr_1.4.2 locfit_1.5-9.1
## [65] later_1.0.0 labeling_0.3
## [67] tidyselect_0.2.5 rlang_0.4.4
## [69] munsell_0.5.0 cellranger_1.1.0
## [71] tools_3.6.0 cli_2.0.1
## [73] generics_0.0.2 RSQLite_2.2.0
## [75] broom_0.5.4 fastmap_1.0.1
## [77] evaluate_0.14 yaml_2.2.0
## [79] bit64_0.9-7 fs_1.3.1
## [81] robustbase_0.93-5 nlme_3.1-139
## [83] mime_0.9 xml2_1.2.2
## [85] compiler_3.6.0 rstudioapi_0.10
## [87] curl_4.3 reprex_0.3.0
## [89] geneplotter_1.64.0 pcaPP_1.9-73
## [91] stringi_1.4.6 highr_0.8
## [93] lattice_0.20-38 Matrix_1.2-17
## [95] vctrs_0.2.2 pillar_1.4.3
## [97] lifecycle_0.1.0 data.table_1.12.8
## [99] bitops_1.0-6 httpuv_1.5.2
## [101] rtracklayer_1.46.0 R6_2.4.1
## [103] latticeExtra_0.6-28 promises_1.1.0
## [105] codetools_0.2-16 MASS_7.3-51.4
## [107] assertthat_0.2.1 SummarizedExperiment_1.16.1
## [109] DESeq2_1.26.0 openssl_1.4.1
## [111] withr_2.1.2 GenomicAlignments_1.22.1
## [113] Rsamtools_2.2.2 GenomeInfoDbData_1.2.2
## [115] hms_0.5.3 grid_3.6.0
## [117] rpart_4.1-15 rmarkdown_1.14
## [119] Cairo_1.5-10 shiny_1.4.0
## [121] lubridate_1.7.4 base64enc_0.1-3